package org.wisdomdata.selenium.processor;

import java.util.logging.Logger;

import org.openqa.selenium.WebDriver;
import org.springframework.beans.factory.annotation.Required;
import org.wisdomdata.framework.Processor;
import org.wisdomdata.selenium.strategy.RecrawlStrategy;


public class SeleniumClickLoopProcessor extends SeleniumActionProcessor {
	private final static Logger logger =
			Logger.getLogger(SeleniumClickLoopProcessor.class.getName());
	
	//最大循环次数，有些时候有些页面是不允许不停的循环的
	private int maxTimes = 0;
	public int getMaxTimes() {
		return maxTimes;
	}

	public void setMaxTimes(int maxTimes) {
		this.maxTimes = maxTimes;
	}




	//是否全部重新抓取
	private boolean crawlAllAgain = false;
	public boolean isCrawlAllAgain() {
		return crawlAllAgain;
	}

	public void setCrawlAllAgain(boolean crawlAllAgain) {
		this.crawlAllAgain = crawlAllAgain;
	}




	/**
	 * 由于这个页面需要不停的点击，所有必须要有抓取的策略，
	 * 第一次抓取会抓所有的页面，但是第二次不会再重新整个抓取
	 * */
	
	private RecrawlStrategy recrawlStrategy;

	public RecrawlStrategy getRecrawlStrategy() {
		return recrawlStrategy;
	}
	@Required
	public void setRecrawlStrategy(RecrawlStrategy recrawlStrategy) {
		this.recrawlStrategy = recrawlStrategy;
	}




	//由于点击过快会出现点击不到的情况，那么可以设置一个点击间隔时间
	private long clickInterval = 0;

	public long getClickInterval() {
		return clickInterval;
	}

	public void setClickInterval(long clickInterval) {
		this.clickInterval = clickInterval;
	}


	
	
	public void innerProcess() {
		String targetUri = ((WebDriver) this.getSearchContext()).getCurrentUrl();
		
		if (this.isCrawlAllAgain())
			//如果需要全部重新抓取，那么这里设置成上次抓取为0
			this.getRecrawlStrategy().setLastTimeTotalNumber(0);
		else
			this.getRecrawlStrategy().setLastTimeTotalNumber(this.getTaskCommon().getTableNumber(targetUri + "_" + this.getTableName()));
		
		//这里必须配置一个获取页面总数的Action，否则不会执行抓取
		int totalNumber = this.getRecrawlStrategy().getCurrentTotalNumber();
		this.getRecrawlStrategy().setTotalNumber(totalNumber);
		
		//获取这次需要抓取的总数
		int thisTotalTime = this.getRecrawlStrategy().getNeedCrawlPageNumber();
		logger.info("SeleniumClickLoopProcessor: this time need crawl " + thisTotalTime + " pages!");
		
		int mark = 0;
		while(true) {
			if (!this.isCrawlAllAgain() && thisTotalTime == 1)
				this.getRecrawlStrategy().setLastPage(true);
			//先提取自身的信息
			this.commonExtract();
			//先处理子节点
			if (this.getChildProcessors() != null && this.getChildProcessors().size() > 0) {
				for (Processor processor : this.getChildProcessors()) {
					processor.process();
				}	
			}
		
			if (this.doAction() == false)
				break;
			mark++;
			thisTotalTime--;
			if (thisTotalTime <= 0 || (this.getMaxTimes() > 0 && mark >= this.getMaxTimes()))
				break;
			try {
				Thread.sleep(getClickInterval());
			} catch (InterruptedException e) {
				e.printStackTrace();
			}
		} 
		this.getTaskCommon().setTableNumber(targetUri + "_" + this.getTableName(), totalNumber);
	}
	
	

}
